/*
 * Copyright (C) 2009 The Android Open Source Project
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

#include <assert.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <math.h>
#include "../include/spellingtable.h"

namespace ime_pinyin {

#ifdef ___BUILD_MODEL___

    const char SpellingTable::
    kNotSupportList[kNotSupportNum][kMaxSpellingSize + 1] = {"HM", "HNG", "NG"};

    // "" is the biggest, so that all empty strings will be moved to the end
    // _eb mean empty is biggest
    int compare_raw_spl_eb ( const void *p1, const void *p2 ) {
        if ( '\0' == ( static_cast<const RawSpelling *> ( p1 ) )->str[0] )
        { return 1; }
        if ( '\0' == ( static_cast<const RawSpelling *> ( p2 ) )->str[0] )
        { return -1; }
        return strcmp ( ( static_cast<const RawSpelling *> ( p1 ) )->str,
                        ( static_cast<const RawSpelling *> ( p2 ) )->str );
    }

    Size_t get_odd_next ( Size_t value ) {
        Size_t v_next = value;
        while ( true ) {
            Size_t v_next_sqrt = ( Size_t ) sqrt ( v_next );
            bool is_odd = true;
            for ( Size_t v_dv = 2; v_dv < v_next_sqrt + 1; v_dv++ ) {
                if ( v_next % v_dv == 0 ) {
                    is_odd = false;
                    break;
                }
            }
            if ( is_odd )
            { return v_next; }
            v_next++;
        }
        // never reach here
        return 0;
    }

    SpellingTable::SpellingTable() {
        need_score_ = false;
        raw_spellings_ = NULL;
        spelling_buf_ = NULL;
        spelling_num_ = 0;
        total_freq_ = 0;
        frozen_ = true;
    }

    SpellingTable::~SpellingTable() {
        free_resource();
    }

    Size_t SpellingTable::get_hash_pos ( const char *spelling_str ) {
        Size_t hash_pos = 0;
        for ( Size_t pos = 0; pos < spelling_size_; pos++ ) {
            if ( '\0' == spelling_str[pos] )
            { break; }
            hash_pos += ( Size_t ) spelling_str[pos];
        }
        hash_pos = hash_pos % spelling_max_num_;
        return hash_pos;
    }

    Size_t SpellingTable::hash_pos_next ( Size_t hash_pos ) {
        hash_pos += 123;
        hash_pos = hash_pos % spelling_max_num_;
        return hash_pos;
    }

    void SpellingTable::free_resource() {
        if ( NULL != raw_spellings_ )
        { delete [] raw_spellings_; }
        raw_spellings_ = NULL;
        if ( NULL != spelling_buf_ )
        { delete [] spelling_buf_; }
        spelling_buf_ = NULL;
    }

    bool SpellingTable::init_table ( Size_t pure_spl_size, Size_t spl_max_num,
                                     bool need_score ) {
        if ( pure_spl_size == 0 || spl_max_num == 0 )
        { return false; }
        need_score_ = need_score;
        free_resource();
        spelling_size_ = pure_spl_size + 1;
        if ( need_score )
        { spelling_size_ += 1; }
        spelling_max_num_ = get_odd_next ( spl_max_num );
        spelling_num_ = 0;
        raw_spellings_ = new RawSpelling[spelling_max_num_];
        spelling_buf_ = new char[spelling_max_num_ * ( spelling_size_ )];
        if ( NULL == raw_spellings_ || NULL == spelling_buf_ ) {
            free_resource();
            return false;
        }
        memset ( raw_spellings_, 0, spelling_max_num_ * sizeof ( RawSpelling ) );
        memset ( spelling_buf_, 0, spelling_max_num_ * ( spelling_size_ ) );
        frozen_ = false;
        total_freq_ = 0;
        return true;
    }

    bool SpellingTable::put_spelling ( const char *spelling_str, double freq ) {
        if ( frozen_ || NULL == spelling_str )
        { return false; }
        for ( Size_t pos = 0; pos < kNotSupportNum; pos++ ) {
            if ( strcmp ( spelling_str, kNotSupportList[pos] ) == 0 ) {
                return false;
            }
        }
        total_freq_ += freq;
        Size_t hash_pos = get_hash_pos ( spelling_str );
        raw_spellings_[hash_pos].str[spelling_size_ - 1] = '\0';
        if ( strncmp ( raw_spellings_[hash_pos].str, spelling_str,
                       spelling_size_ - 1 ) == 0 ) {
            raw_spellings_[hash_pos].freq += freq;
            return true;
        }
        Size_t hash_pos_ori = hash_pos;
        while ( true ) {
            if ( strncmp ( raw_spellings_[hash_pos].str,
                           spelling_str, spelling_size_ - 1 ) == 0 ) {
                raw_spellings_[hash_pos].freq += freq;
                return true;
            }
            if ( '\0' == raw_spellings_[hash_pos].str[0] ) {
                raw_spellings_[hash_pos].freq += freq;
                strncpy ( raw_spellings_[hash_pos].str, spelling_str, spelling_size_ - 1 );
                raw_spellings_[hash_pos].str[spelling_size_ - 1] = '\0';
                spelling_num_++;
                return true;
            }
            hash_pos = hash_pos_next ( hash_pos );
            if ( hash_pos_ori == hash_pos )
            { return false; }
        }
        // never reach here
        return false;
    }

    bool SpellingTable::contain ( const char *spelling_str ) {
        if ( NULL == spelling_str || NULL == spelling_buf_ || frozen_ )
        { return false; }
        Size_t hash_pos = get_hash_pos ( spelling_str );
        if ( '\0' == raw_spellings_[hash_pos].str[0] )
        { return false; }
        if ( strncmp ( raw_spellings_[hash_pos].str, spelling_str, spelling_size_ - 1 )
             == 0 )
        { return true; }
        Size_t hash_pos_ori = hash_pos;
        while ( true ) {
            hash_pos = hash_pos_next ( hash_pos );
            if ( hash_pos_ori == hash_pos )
            { return false; }
            if ( '\0' == raw_spellings_[hash_pos].str[0] )
            { return false; }
            if ( strncmp ( raw_spellings_[hash_pos].str, spelling_str, spelling_size_ - 1 )
                 == 0 )
            { return true; }
        }
        // never reach here
        return false;
    }

    const char *SpellingTable::arrange ( Size_t *item_size, Size_t *spl_num ) {
        if ( NULL == raw_spellings_ || NULL == spelling_buf_ ||
             NULL == item_size || NULL == spl_num )
        { return NULL; }
        qsort ( raw_spellings_, spelling_max_num_, sizeof ( RawSpelling ),
                compare_raw_spl_eb );
        // After sorting, only the first spelling_num_ items are valid.
        // Copy them to the destination buffer.
        for ( Size_t pos = 0; pos < spelling_num_; pos++ ) {
            strncpy ( spelling_buf_ + pos * spelling_size_, raw_spellings_[pos].str,
                      spelling_size_ );
        }
        if ( need_score_ ) {
            if ( kPrintDebug0 )
            { printf ( "------------Spelling Possiblities--------------\n" ); }
            double max_score = 0;
            double min_score = 0;
            // After sorting, only the first spelling_num_ items are valid.
            for ( Size_t pos = 0; pos < spelling_num_; pos++ ) {
                raw_spellings_[pos].freq /= total_freq_;
                if ( need_score_ ) {
                    if ( 0 == pos ) {
                        max_score = raw_spellings_[0].freq;
                        min_score = max_score;
                    } else {
                        if ( raw_spellings_[pos].freq > max_score )
                        { max_score = raw_spellings_[pos].freq; }
                        if ( raw_spellings_[pos].freq < min_score )
                        { min_score = raw_spellings_[pos].freq; }
                    }
                }
            }
            if ( kPrintDebug0 )
            { printf ( "-----max psb: %f, min psb: %f\n", max_score, min_score ); }
            max_score = log ( max_score );
            min_score = log ( min_score );
            if ( kPrintDebug0 )
                printf ( "-----max log value: %f, min log value: %f\n",
                         max_score, min_score );
            // The absolute value of min_score is bigger than that of max_score because
            // both of them are negative after log function.
            score_amplifier_ = 1.0 * 255 / min_score;
            double average_score = 0;
            for ( Size_t pos = 0; pos < spelling_num_; pos++ ) {
                double score = log ( raw_spellings_[pos].freq ) * score_amplifier_;
                assert ( score >= 0 );
                average_score += score;
                // Because of calculation precision issue, score might be a little bigger
                // than 255 after being amplified.
                if ( score > 255 )
                { score = 255; }
                char *this_spl_buf = spelling_buf_ + pos * spelling_size_;
                this_spl_buf[spelling_size_ - 1] =
                    static_cast<char> ( ( unsigned char ) score );
                if ( kPrintDebug0 ) {
                    printf ( "---pos:%d, %s, psb:%d\n", pos, this_spl_buf,
                             ( unsigned char ) this_spl_buf[spelling_size_ - 1] );
                }
            }
            average_score /= spelling_num_;
            assert ( average_score <= 255 );
            average_score_ = static_cast<uint8> ( average_score );
            if ( kPrintDebug0 )
                printf ( "\n----Score Amplifier: %f, Average Score: %d\n", score_amplifier_,
                         average_score_ );
        }
        *item_size = spelling_size_;
        *spl_num = spelling_num_;
        frozen_ = true;
        return spelling_buf_;
    }

    float SpellingTable::get_score_amplifier() {
        return static_cast<float> ( score_amplifier_ );
    }

    unsigned char SpellingTable::get_average_score() {
        return average_score_;
    }

#endif  // ___BUILD_MODEL___
}  // namespace ime_pinyin
